Passed
Push — master ( f3d4ca...90c034 )
by Night
01:13
created

UB.CSV_detectSeperator   F

Complexity

Conditions 14
Paths 34

Size

Total Lines 56
Code Lines 40

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 1 Features 1
Metric Value
cc 14
eloc 40
nc 34
nop 2
dl 0
loc 56
rs 3.6
c 2
b 1
f 1

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like UB.CSV_detectSeperator often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
/** global: UB */
2
//removeIf(nodejs)
3
4
5
6
7
var arrayFuncs = {
8
9
10
	/** Fast and simple CSV encoder.
11
		Supports multi-line values and values with double quotes.
12
		Automatic/smart escaping like Excel-generated CSVs. */
13
	encodeCSV: function(headers, trimValues, columnar, seperator = ",") {
14
		var linesData = this;
15
		
16
		if (columnar) {
17
			linesData = linesData.transpose();
18
		}
19
		
20
		var sb = [];
21
		
22
		// per line
23
		for (var l = (headers != null ? -1 : 0), ll = linesData.length - 1; l <= ll; l++) {
24
			
25
			// fetch header / line data words
26
			var words = l === -1 ? headers : linesData[l];
27
			
28
			// per word
29
			for (var v = 0, vl = words.length - 1; v <= vl; v++) {
30
				
31
				// if value given
32
				if (words[v] != null) {
33
					
34
					// convert value to text
35
					var word = words[v].toString();
36
					if (trimValues) {
37
						word = word.trim();
38
					}
39
					
40
					// add word
41
					if (word.indexOf('"') > -1) {
42
						
43
						// escape quotes, and enclose in quotes if word has a quote
44
						sb.push('"');
45
						sb.push(word.split("\"").join("\"\""));
46
						sb.push('"');
47
						
48
					} else if (word.indexOf(',') > -1) {
49
						
50
						// enclose in quotes if word has comma
51
						sb.push('"');
52
						sb.push(word);
53
						sb.push('"');
54
						
55
					} else {
56
						
57
						// add straightaway otherwise
58
						sb.push(word);
59
					}
60
				}
61
				
62
				// add seperator
63
				if (v < vl) {
64
					sb.push(seperator);
65
				}
66
			}
67
			
68
			// add newline
69
			if (l < ll) {
70
				sb.push("\r\n");
71
			}
72
		}
73
		
74
		return sb.join("");
75
	},
76
	
77
78
	/** Fast and simple TSV encoder.
79
		Supports multi-line values and values with double quotes.
80
		Automatic/smart escaping like Excel-generated TSV. */
81
		encodeTSV: function(headers, trimValues, columnar) {
82
			var linesData = this;
83
			return linesData.encodeCSV(headers, trimValues, columnar, "\t");
84
		},
85
86
    none:null,
87
	},
88
}
89
90
// register funcs
91
UB.registerFuncs(Array.prototype, arrayFuncs);
92
93
94
95
var stringFuncs = {
96
97
	/** Decodes the given CSV file string and returns the cell data as an array.
98
	 *  Extremely robust and fast CSV parser. Only parser that works with all bizarre but valid test files.
99
	 * 
100
	 * 3 modes are available:
101
	 * if `headers` is null - All cells are returned as 2D array. (default)
102
	 * if `headers` is given and 0 length - The first row is stored in `headers`, remaining cells and returned as 2D array.
103
	 * if `headers` is given and >0 length - All rows are returned as objects, with the given headers treated as the prop names for the objects.
104
	 */
105
	decodeCSV: function (headers, trimValues, columnar = false, seperator = "auto") {
106
		var csvString = this.toString();
107
	
108
		// cut String into lines
109
		var lines = csvString.trim().splitLines();
110
		var sep = seperator == "auto" ? UB.CSV_detectSeperator(csvString, lines.length) : seperator;
111
		
112
		// config
113
		var hasHeaders = headers != null;
114
		var returnAsObjs = hasHeaders && headers.exists();
115
116
		// status
117
		var inQuoted = false;
118
		
119
		// result
120
		var linesData = [];
121
		var word = [];
122
		var tempHeaders = [];
123
		var lineWords = [];
124
125
		// per line
126
		for (var l = 0, ll = lines.length; l < ll; l++) {
127
			var line = lines[l];
128
			var isHeader = (l === 0 && hasHeaders);
129
			
130
			// if we are in quoted text
131
			if (inQuoted) {
132
				
133
				// keep taking chars
134
				
135
			}else{
136
				
137
				// save words into headers array / new array
138
				lineWords = [];
139
				if (isHeader){
140
					if (returnAsObjs){
141
						lineWords = tempHeaders;
142
					}else{
143
						lineWords = headers;
144
					}
145
				}
146
				if (!isHeader) {
147
					linesData.push(lineWords);
148
				}
149
				
150
			}
151
			
152
			// per char
153
			for (var c = 0, clast = line.length - 1; c <= clast; c++) {
154
				var ch = line.charAt(c);
155
				
156
				// if we are in quoted text
157
				if (inQuoted) {
158
					
159
					// quotes..
160
					if (ch == "\"") {
161
						
162
						// quote may be escaped
163
						if (line.charAt(c + 1) == "\"") {
164
							c++;
165
							word.push("\"");
166
						}else {
167
							
168
							// quote means ending quoted text
169
							inQuoted = false;
170
						}
171
						
172
						continue;
173
					}
174
					
175
					// normal char
176
					word.push(ch);
177
					
178
					
179
				}else {
180
					
181
					// quote means beginning quoted text
182
					if (ch == "\""){
183
						inQuoted = true;
184
						continue;
185
					}
186
					
187
					// comma means end of word
188
					if (ch == sep) {
189
						lineWords.push(trimValues ? word.join("").trim() : word.join(""));
190
						word = [];
191
						continue;
192
					}
193
					
194
					// normal char
195
					word.push(ch);
196
					
197
					// newline means end of word
198
					if (c == clast) {
199
						lineWords.push(trimValues ? word.join("").trim() : word.join(""));
200
						word = [];
201
					}
202
					
203
				}
204
				
205
			}
206
			
207
			// at end of line take word
208
			if (!inQuoted && word.Length > 0) {
209
				lineWords.push(trimValues ? word.join("").trim() : word.join(""));
210
				word = [];
211
			}
212
		}
213
214
		// convert array to objs
215
		if (returnAsObjs){
216
217
			// go thru all rows
218
			for (var l = 0, ll = linesData.length; l < ll; l++) {
219
				var row = linesData[l];
220
				var obj = {};
221
222
				// convert all cells to obj props
223
				for (var h = 0, hl = headers.length; h < hl; h++) {
224
					var header = headers[h];
225
					obj[header] = row[h];
226
				}
227
				linesData[l] = obj;
228
			}
229
		}
230
		
231
		// convert 2D array to columnar
232
		if (columnar && !returnAsObjs) {
233
			linesData = linesData.transpose();
234
		}
235
236
		return linesData;
237
	},
238
239
	/** Decodes the given TSV file string and returns the cell data as an array.
240
	 *  Extremely robust and fast TSV parser. Only parser that works with all bizarre but valid test files.
241
	 * 
242
	 * 3 modes are available:
243
	 * if `headers` is null - All cells are returned as 2D array. (default)
244
	 * if `headers` is given and 0 length - The first row is stored in `headers`, remaining cells and returned as 2D array.
245
	 * if `headers` is given and >0 length - All rows are returned as objects, with the given headers treated as the prop names for the objects.
246
	 */
247
	decodeTSV: function (headers, trimValues, columnar = false) {
248
		var tsvString = this.toString();
249
		return tsvString.decodeCSV(headers, trimValues, columnar, "\t");
250
	},
251
252
    none:null,
253
}
254
255
// register funcs
256
UB.registerFuncs(String.prototype, stringFuncs);
257
258
259
260
261
262
/* File Utils - NodeJS only */
263
264
var fs = require('fs');
265
var pathUtil = require('path');
266
267
arrayFuncs = {
268
269
	saveToCSV: function(filePath, headers = null, trimValues = true, columnar = false, fixedSep = ",") {
270
		var data = this;
271
272
		// write CSV to string
273
		var str = data.encodeCSV(headers, trimValues, columnar, fixedSep);
274
		
275
		// save data as string via filestream
276
		str.saveToText(filePath);
277
	},
278
279
    none:null,
280
}
281
282
// register funcs
283
UB.registerFuncs(Array.prototype, arrayFuncs);
284
285
286
stringFuncs = {
287
288
	/**
289
	 * Loads the given CSV file, decoding the cell data and returning it as an array.
290
	 * Extremely robust and fast CSV parser. Only parser that works with all bizarre but valid test files.
291
	 * 
292
	 * 3 modes are available:
293
	 * if `headers` is null - All cells are returned as 2D array. (default)
294
	 * if `headers` is given and 0 length - The first row is stored in `headers`, remaining cells and returned as 2D array.
295
	 * if `headers` is given and >0 length - All rows are returned as objects, with the given headers treated as the prop names for the objects.
296
	 */
297
	loadCSV: function(encoding = "utf8", headers = null, trimValues = true, columnar = false, seperator = "auto") {
298
299
		// load text file
300
		var file = this.toString();
301
		var text = file.loadText(encoding);
302
		if (text == null) {
303
			return null;
304
		}
305
		
306
		// parse CSV string into Array
307
		return text.decodeCSV(headers, trimValues, columnar, seperator);
308
	},
309
310
    none:null,
311
}
312
313
// register funcs
314
UB.registerFuncs(String.prototype, stringFuncs);
315
316
//endRemoveIf(nodejs)
317
318
319
320
321
322
// UTILS
323
UB.CSV_seperators = [',', ';', ':', '\t'];
324
325
UB.CSV_detectSeperator = function(csvString, rowCount) {
326
	var sepCount = UB.newArray(0, UB.CSV_seperators.length);
327
328
	var character;
329
330
	var quoted = false;
331
	var firstChar = true;
332
	var foundAny = false;
333
334
	var c = 0;
335
	var cl = csvString.length - 50;  // skip last few chars
336
	while (c < cl) {
337
		character = csvString.charAt(c);
338
		c++;
339
340
		switch (character) {
341
			case '"':
342
				if (quoted) {
343
					if (csvString.charAt(c + 1) != '"') {  // Value is quoted and current character is " and next character is not ".
344
						quoted = false;
345
					} else {
346
						c++;  // Value is quoted and current and next characters are "" - read (skip) peeked qoute.
347
					}
348
				} else {
349
					if (firstChar) {  // Set value as quoted only if this quote is the first char in the value.
350
						quoted = true;
351
					}
352
				}
353
				break;
354
			case '\r':
355
			case '\n':
356
				if (!quoted) {
357
					firstChar = true;
358
					continue;
359
				}
360
				break;
361
			default:
362
				if (!quoted) {
363
					var index = UB.CSV_seperators.indexOf(character);
364
					if (index != -1) {
365
						sepCount[index]++;
366
						firstChar = true;
367
						foundAny = true;
368
						continue;
369
					}
370
				}
371
				break;
372
		}
373
374
		if (firstChar) {
375
			firstChar = false;
376
		}
377
	}
378
379
	return !foundAny ? ',' : UB.CSV_seperators[sepCount.indexOfMax()];
380
}
381
382
383